###############################################################################
##################3. Calculate Horizontal Inequality###########################
###############################################################################

##Horizontal Inequalities function##

horizontal <- function(data,
                       variable.interest,
                       variable.interest2,
                       variable.party,
                       variable.party2,
                       variable.partisan,
                       variable.partisan2,
                       variable.country,
                       variable.country2,
                       variable.year,
                       variable.year2,
                       variable.weight,
                       variable.weight2,
                       country, 
                       year,
                       gini.var,
                       theil.var,
                       cov.var,
                       sdmw.var){
  #Subset data to requested country, year, and partisans#
  print(country)
  require(dplyr)
  require(lazyeval)
  
  filter_criteria1 <- interp(~y == x, 
                             .values = list(y = as.name(variable.country), x = country))
  filter_criteria2 <- interp(~y == x, 
                             .values = list(y = as.name(variable.year), 
                                            x = year))
  filter_criteria3 <- interp( ~y == 1, 
                              .values = list(y = as.name(variable.partisan)))
  filter_criteria4 <- interp(~!is.na(y) == TRUE, 
                             .values = list(y = as.name(variable.party)))
  partisans <- data %>%
    filter_(filter_criteria2) %>%
    filter_(filter_criteria1) %>%
    filter_(filter_criteria3) %>%
    filter_(filter_criteria4)
  
  print(table(partisans[,variable.party])) 
  
  #Meaning of numbers in CSES Appendix#
  
  library(dplyr)
  
  #Averge Income of each party's partians#
  
  variable.party2 <- enquo(variable.party2)                   
  
  # Create quosure
  
  variable.interest2 <- enquo(variable.interest2)
  
  variable.weight2 <- enquo(variable.weight2)
  
  mean <- data.frame(V1 = partisans %>%
                       group_by(!!variable.party2) %>%  
                       summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% 
                       pull(m),
                     n.part = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(n = n()/nrow(partisans)) %>%
                       pull(n),
                     num = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(ns = n()) %>%
                       pull(ns))
  
  means <- mean %>%
    filter(num >= 10)
  print(means)
  
  #Attach party indicators#
  
  means$party <- as.numeric(rownames(means))
  
  print(means)
  
  
  
  
  #Weighted mean income of all respondents#
  
  avg.p <- partisans %>% 
    summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% pull(m)
  
  print(avg.p)
  
  #Ratio of party means to overall means#
  
  means$ratio <- means$V1/avg.p
  print(means$ratio)
  
  #Difference between party means and overall means#
  
  means$dif <- means$V1 - avg.p
  print(means$dif)
  #Calculate GGini#
  
  nums <- matrix(nrow = length(means$party), ncol = length(means$party))
  nums <- as.data.frame(nums)
  sums <- NA
  print(class(means))
  print(class(nums))
  
  
  #GGini#
  print(class(means$party))
  print(means)
  print(nums)
  for(i in means$party){
    for(j in means$party){
      nums[i,j] <- means$n.part[i]*means$n.part[j]*
        abs(means$V1[i] - means$V1[j])
    }}
  print(nums)
  sums <- rowSums(nums)
  print(sums)
  ggini <- (1/(2 * avg.p)) * sum(sums)
  
  #GTheil#
  
  theil.component <- NA
  gtheil <- NA
  for(i in means$party){
    theil.component[i] <- means$n.part[i] * means$ratio[i] * log(means$ratio[i])
  }
  
  gtheil <- sum(theil.component, na.rm = T)
  
  #GCOV# 
  
  inner <- NA
  gcov <- NA
  for(i in means$party){
    inner[i] <- means$n.part[i] * (means$dif[i]^2)
  }
  gcov <- (1/avg.p)*sqrt(sum(inner, na.rm = T))
  
  #Weighted#
  
  sdmw <- wt.sd(means$V1, means$n.part)
  
  #Attach to pols#
  
  Cses1[Cses1$country.name == country & Cses1$surveyyear == year, gini.var] <<- ggini
  Cses1[Cses1$country.name == country & Cses1$surveyyear == year, theil.var] <<- gtheil
  Cses1[Cses1$country.name == country & Cses1$surveyyear == year, cov.var] <<- gcov
  Cses1[Cses1$country.name == country & Cses1$surveyyear == year, sdmw.var] <<- sdmw
}

#####CSES1#####

##Bring in cses1 data##

load("cses1.rdata")

##Data Pre-Processing for Income##
table(cses1$A2012) 

#8 = don't know, 9 = missing, both to NA

cses1$A2012 <- ifelse(cses1$A2012 == 8 | cses1$A2012 == 9, NA, cses1$A2012)

Cses1<- data.frame(country.name = c("Belgium", "Denmark", "Germany", "Netherlands", "Portugal", "Spain", "Spain", "Sweden", "Great Britain"),
                   surveyyear = c(1999, 1998, 1998, 1998, 2002, 1996, 2000, 1998, 1997))

#Adjustment for weighting#

cses1$weight <-   cses1$A1010_1 * cses1$A1010_2 


#For Income#

for(i in  1:9){
  horizontal(data = cses1, 
             variable.interest = "A2012",
             variable.interest2 = A2012,
             variable.party = "A3005_1",
             variable.party2 = A3005_1,
             variable.partisan = "A3004",
             variable.partisan2 = A3004,
             variable.country = "A1006_NAM",
             variable.country2 = A1006_NAM,
             variable.year = "A1008",
             variable.year2 = A1008,
             variable.weight = "weight",
             variable.weight2 = weight,
             country =Cses1$country.name[i], 
             year = Cses1$surveyyear[i],
             gini.var = "ggini_inc_cs",
             theil.var = "gtheil_inc_cs",
             cov.var = "gcov_inc_cs",
             sdmw.var = "sdmw_inc_cs")
}

rm(cses1)

#####CSES2#####

##Horizontal Inequalities function##

horizontal.2 <- function(data,
                         variable.interest,
                         variable.interest2,
                         variable.party,
                         variable.party2,
                         variable.partisan,
                         variable.partisan2,
                         variable.country,
                         variable.country2,
                         variable.year,
                         variable.year2,
                         variable.weight,
                         variable.weight2,
                         country, 
                         year,
                         gini.var,
                         theil.var,
                         cov.var,
                         sdmw.var){
  #Subset data to requested country, year, and partisans#
  print(country)
  require(dplyr)
  require(lazyeval)
  
  filter_criteria1 <- interp(~y == x, 
                             .values = list(y = as.name(variable.country), x = country))
  filter_criteria2 <- interp(~y == x, 
                             .values = list(y = as.name(variable.year), 
                                            x = year))
  filter_criteria3 <- interp( ~y == 1, 
                              .values = list(y = as.name(variable.partisan)))
  filter_criteria4 <- interp(~!is.na(y) == TRUE, 
                             .values = list(y = as.name(variable.party)))
  partisans <- data %>%
    filter_(filter_criteria2) %>%
    filter_(filter_criteria1) %>%
    filter_(filter_criteria3) %>%
    filter_(filter_criteria4)
  
  print(table(partisans[,variable.party])) #Check if any groups need to be combined#
  
  #Meaning of numbers in CSES Appendix#
  
  library(dplyr)
  
  #Averge Income of each party's partians#
  
  variable.party2 <- enquo(variable.party2)                    # Create quosure
  
  variable.interest2 <- enquo(variable.interest2)
  
  variable.weight2 <- enquo(variable.weight2)
  
  mean <- data.frame(V1 = partisans %>%
                       group_by(!!variable.party2) %>%  
                       summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% 
                       pull(m),
                     n.part = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(n = n()/nrow(partisans)) %>%
                       pull(n),
                     num = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(ns = n()) %>%
                       pull(ns))
  
  means <- mean %>%
    filter(num >= 10)
  print(means)
  
  #Attach party indicators#
  
  means$party <- as.numeric(rownames(means))
  
  print(means)
  
  
  
  
  #Weighted mean income of all respondents#
  
  avg.p <- partisans %>% 
    summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% pull(m)
  
  print(avg.p)
  
  #Ratio of party means to overall means#
  
  means$ratio <- means$V1/avg.p
  print(means$ratio)
  
  #Difference between party means and overall means#
  
  means$dif <- means$V1 - avg.p
  print(means$dif)
  #Calculate GGini#
  
  nums <- matrix(nrow = length(means$party), ncol = length(means$party))
  nums <- as.data.frame(nums)
  sums <- NA
  print(class(means))
  print(class(nums))
  
  
  #GGini#
  print(class(means$party))
  print(means)
  print(nums)
  for(i in means$party){
    for(j in means$party){
      nums[i,j] <- means$n.part[i]*means$n.part[j]*
        abs(means$V1[i] - means$V1[j])
    }}
  print(nums)
  sums <- rowSums(nums)
  print(sums)
  ggini <- (1/(2 * avg.p)) * sum(sums)
  
  #GTheil#
  
  theil.component <- NA
  gtheil <- NA
  for(i in means$party){
    theil.component[i] <- means$n.part[i] * means$ratio[i] * log(means$ratio[i])
  }
  
  gtheil <- sum(theil.component, na.rm = T)
  
  #GCOV# 
  
  inner <- NA
  gcov <- NA
  for(i in means$party){
    inner[i] <- means$n.part[i] * (means$dif[i]^2)
  }
  gcov <- (1/avg.p)*sqrt(sum(inner, na.rm = T))
  
  #Weighted#
  
  sdmw <- wt.sd(means$V1, means$n.part)
  
  #Attach to pols#
  
  Cses2[Cses2$country.name == country & Cses2$surveyyear == year, gini.var] <<- ggini
  Cses2[Cses2$country.name == country & Cses2$surveyyear == year, theil.var] <<- gtheil
  Cses2[Cses2$country.name == country & Cses2$surveyyear == year, cov.var] <<- gcov
  Cses2[Cses2$country.name == country & Cses2$surveyyear == year, sdmw.var] <<- sdmw
}

load("Ccses2.rdata")

##Pre-Processing
table(cses2$B2020)

cses2$B2020[cses2$B2020 == 6 | cses2$B2020 == 7 | cses2$B2020 == 8 | cses2$B2020 == 9]  <- NA

table(cses2$B2020)

Cses2 <- data.frame(country.name = c("Bulgaria", "Czech Republic", "Denmark", "Finland", "France", "Germany", "Great Britain", "Hungary", "Ireland", "Italy", "Netherlands", "Poland", "Portugal", "Romania", "Slovenia", "Spain"),
                    surveyyear = c(2001, 2002, 2001, 2003, 2002, 2002, 2005, 2002, 2002, 2006, 2002, 2001, 2005, 2004, 2004, 2004))

table(cses2$B3029_1[cses2$B1006_NAM == "Germany"])

cses2$B3029_1[cses2$B1006_NAM == "Germany" & (cses2$B3029_1 == 2 | cses2$B3029_1 == 3)] <- 2

#Adjust weights

cses2$weight <-  cses2$B1011_1 * cses2$B1011_2 

#Income#

for(i in 1:16){
  horizontal.2(data = cses2, 
               variable.interest = "B2020",
               variable.interest2 = B2020,
               variable.party = "B3029_1",
               variable.party2 = B3029_1,
               variable.partisan = "B3028",
               variable.partisan2 = B3028,
               variable.country = "B1006_NAM",
               variable.country2 = B1006_NAM,
               variable.year = "B1008",
               variable.year2 = B1008,
               variable.weight = "weight",
               variable.weight2 = weight,
               country =Cses2$country.name[i], 
               year = Cses2$surveyyear[i],
               gini.var = "ggini_inc_cs",
               theil.var = "gtheil_inc_cs",
               cov.var = "gcov_inc_cs",
               sdmw.var = "sdmw_inc_cs")
}

rm(cses2)

#####CSES3#####

horizontal.3 <- function(data,
                         variable.interest,
                         variable.interest2,
                         variable.party,
                         variable.party2,
                         variable.partisan,
                         variable.partisan2,
                         variable.country,
                         variable.country2,
                         variable.year,
                         variable.year2,
                         variable.weight,
                         variable.weight2,
                         country, 
                         year,
                         gini.var,
                         theil.var,
                         cov.var,
                         sdmw.var){
  #Subset data to requested country, year, and partisans#
  print(country)
  require(dplyr)
  require(lazyeval)
  
  filter_criteria1 <- interp(~y == x, 
                             .values = list(y = as.name(variable.country), x = country))
  filter_criteria2 <- interp(~y == x, 
                             .values = list(y = as.name(variable.year), 
                                            x = year))
  filter_criteria3 <- interp( ~y == 1, 
                              .values = list(y = as.name(variable.partisan)))
  filter_criteria4 <- interp(~!is.na(y) == TRUE, 
                             .values = list(y = as.name(variable.party)))
  partisans <- data %>%
    filter_(filter_criteria2) %>%
    filter_(filter_criteria1) %>%
    filter_(filter_criteria3) %>%
    filter_(filter_criteria4)
  
  print(table(partisans[,variable.party])) #Check if any groups need to be combined#
  
  #Meaning of numbers in CSES Appendix#
  
  library(dplyr)
  
  #Averge Income of each party's partians#
  
  variable.party2 <- enquo(variable.party2)                    # Create quosure
  
  variable.interest2 <- enquo(variable.interest2)
  
  variable.weight2 <- enquo(variable.weight2)
  
  mean <- data.frame(V1 = partisans %>%
                       group_by(!!variable.party2) %>%  
                       summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% 
                       pull(m),
                     n.part = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(n = n()/nrow(partisans)) %>%
                       pull(n),
                     num = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(ns = n()) %>%
                       pull(ns))
  
  means <- mean %>%
    filter(num >= 10)
  print(means)
  
  #Attach party indicators#
  
  means$party <- as.numeric(rownames(means))
  
  print(means)
  
  
  
  
  #Weighted mean income of all respondents#
  
  avg.p <- partisans %>% 
    summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% pull(m)
  
  print(avg.p)
  
  #Ratio of party means to overall means#
  
  means$ratio <- means$V1/avg.p
  print(means$ratio)
  
  #Difference between party means and overall means#
  
  means$dif <- means$V1 - avg.p
  print(means$dif)
  #Calculate GGini#
  
  nums <- matrix(nrow = length(means$party), ncol = length(means$party))
  nums <- as.data.frame(nums)
  sums <- NA
  print(class(means))
  print(class(nums))
  
  
  #GGini#
  print(class(means$party))
  print(means)
  print(nums)
  for(i in means$party){
    for(j in means$party){
      nums[i,j] <- means$n.part[i]*means$n.part[j]*
        abs(means$V1[i] - means$V1[j])
    }}
  print(nums)
  sums <- rowSums(nums)
  print(sums)
  ggini <- (1/(2 * avg.p)) * sum(sums)
  
  #GTheil#
  
  theil.component <- NA
  gtheil <- NA
  for(i in means$party){
    theil.component[i] <- means$n.part[i] * means$ratio[i] * log(means$ratio[i])
  }
  
  gtheil <- sum(theil.component, na.rm = T)
  
  #GCOV# 
  
  inner <- NA
  gcov <- NA
  for(i in means$party){
    inner[i] <- means$n.part[i] * (means$dif[i]^2)
  }
  gcov <- (1/avg.p)*sqrt(sum(inner, na.rm = T))
  
  #Weighted#
  
  sdmw <- wt.sd(means$V1, means$n.part)
  
  #Attach to pols#
  
  Cses3[Cses3$country.name == country & Cses3$surveyyear == year, gini.var] <<- ggini
  Cses3[Cses3$country.name == country & Cses3$surveyyear == year, theil.var] <<- gtheil
  Cses3[Cses3$country.name == country & Cses3$surveyyear == year, cov.var] <<- gcov
  Cses3[Cses3$country.name == country & Cses3$surveyyear == year, sdmw.var] <<- sdmw
}

load("cses3.rdata")

#Pre-Processing#

table(cses3$C2020)

cses3$C2020[cses3$C2020 == 6 | cses3$C2020 == 7 | cses3$C2020 == 8 | cses3$C2020 == 9]  <- NA

table(cses3$C2020)

Cses3 <- data.frame(country.name = c("Austria", "Czech Republic", "Czech Republic", "Denmark", "Estonia", "Finland", "Finland", "France", "Germany", "Germany", "Greece", "Ireland", "Netherlands", "Netherlands", "Poland", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"),
                    surveyyear = c(2008, 2006, 2010, 2007, 2011, 2007, 2011, 2007, 2005, 2009, 2009, 2007, 2006, 2010, 2005, 2007, 2009, 2009, 2010, 2008, 2008, 2006))

#Adjust weights

cses3$weight <- cses3$C1010_1 * cses3$C1010_2 

#Income#

for(i in 1:22){
  horizontal.3(data = cses3, 
               variable.interest = "C2020",
               variable.interest2 = C2020,
               variable.party = "C3020_3",
               variable.party2 = C3020_3,
               variable.partisan = "C3020_1",
               variable.partisan2 = C3020_1,
               variable.country = "C1006_NAM",
               variable.country2 = C1006_NAM,
               variable.year = "C1008",
               variable.year2 = C1008,
               variable.weight = "weight",
               variable.weight2 = weight,
               country = Cses3$country.name[i],
               year = Cses3$surveyyear[i],
               gini.var = "ggini_inc_cs",
               theil.var = "gtheil_inc_cs",
               cov.var = "gcov_inc_cs",
               sdmw.var = "sdmw_inc_cs")
}

rm(cses3)

#####CSES4#####

horizontal.4 <- function(data,
                         variable.interest,
                         variable.interest2,
                         variable.party,
                         variable.party2,
                         variable.partisan,
                         variable.partisan2,
                         variable.country,
                         variable.country2,
                         variable.year,
                         variable.year2,
                         variable.weight,
                         variable.weight2,
                         country, 
                         year,
                         gini.var,
                         theil.var,
                         cov.var,
                         sdmw.var){
  #Subset data to requested country, year, and partisans#
  print(country)
  require(dplyr)
  require(lazyeval)
  
  filter_criteria1 <- interp(~y == x, 
                             .values = list(y = as.name(variable.country), x = country))
  filter_criteria2 <- interp(~y == x, 
                             .values = list(y = as.name(variable.year), 
                                            x = year))
  filter_criteria3 <- interp( ~y == 1, 
                              .values = list(y = as.name(variable.partisan)))
  filter_criteria4 <- interp(~!is.na(y) == TRUE, 
                             .values = list(y = as.name(variable.party)))
  partisans <- data %>%
    filter_(filter_criteria2) %>%
    filter_(filter_criteria1) %>%
    filter_(filter_criteria3) %>%
    filter_(filter_criteria4)
  
  print(table(partisans[,variable.party])) #Check if any groups need to be combined#
  
  #Meaning of numbers in CSES Appendix#
  
  library(dplyr)
  
  #Averge Income of each party's partians#
  
  variable.party2 <- enquo(variable.party2)                    # Create quosure
  
  variable.interest2 <- enquo(variable.interest2)
  
  variable.weight2 <- enquo(variable.weight2)
  
  mean <- data.frame(V1 = partisans %>%
                       group_by(!!variable.party2) %>%  
                       summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% 
                       pull(m),
                     n.part = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(n = n()/nrow(partisans)) %>%
                       pull(n),
                     num = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(ns = n()) %>%
                       pull(ns))
  
  means <- mean %>%
    filter(num >= 10)
  print(means)
  
  #Attach party indicators#
  
  means$party <- as.numeric(rownames(means))
  
  print(means)
  
  
  
  
  #Weighted mean income of all respondents#
  
  avg.p <- partisans %>% 
    summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% pull(m)
  
  print(avg.p)
  
  #Ratio of party means to overall means#
  
  means$ratio <- means$V1/avg.p
  print(means$ratio)
  
  #Difference between party means and overall means#
  
  means$dif <- means$V1 - avg.p
  print(means$dif)
  #Calculate GGini#
  
  nums <- matrix(nrow = length(means$party), ncol = length(means$party))
  nums <- as.data.frame(nums)
  sums <- NA
  print(class(means))
  print(class(nums))
  
  
  #GGini#
  print(class(means$party))
  print(means)
  print(nums)
  for(i in means$party){
    for(j in means$party){
      nums[i,j] <- means$n.part[i]*means$n.part[j]*
        abs(means$V1[i] - means$V1[j])
    }}
  print(nums)
  sums <- rowSums(nums)
  print(sums)
  ggini <- (1/(2 * avg.p)) * sum(sums)
  
  #GTheil#
  
  theil.component <- NA
  gtheil <- NA
  for(i in means$party){
    theil.component[i] <- means$n.part[i] * means$ratio[i] * log(means$ratio[i])
  }
  
  gtheil <- sum(theil.component, na.rm = T)
  
  #GCOV# 
  
  inner <- NA
  gcov <- NA
  for(i in means$party){
    inner[i] <- means$n.part[i] * (means$dif[i]^2)
  }
  gcov <- (1/avg.p)*sqrt(sum(inner, na.rm = T))
  
  #Weighted#
  
  sdmw <- wt.sd(means$V1, means$n.part)
  
  #Attach to pols#
  
  Cses4[Cses4$country.name == country & Cses4$surveyyear == year, gini.var] <<- ggini
  Cses4[Cses4$country.name == country & Cses4$surveyyear == year, theil.var] <<- gtheil
  Cses4[Cses4$country.name == country & Cses4$surveyyear == year, cov.var] <<- gcov
  Cses4[Cses4$country.name == country & Cses4$surveyyear == year, sdmw.var] <<- sdmw
}

load("cses4.rdata")

#Pre-Processing#
table(cses4$D2020)

cses4$D2020[cses4$D2020 == 6 | cses4$D2020 == 7 | cses4$D2020 == 8 | cses4$D2020 == 9]  <- NA

table(cses4$D2020)

Cses4 <- data.frame(country.name = c("Austria", "Bulgaria", "Czech Republic", "Germany", "Great Britain", "Greece", "Greece", "Ireland", "Latvia", "Latvia", "Poland", "Portugal", "Romania", "Romania", "Slovakia", "Slovenia", "Sweden"),
                    surveyyear = c(2013, 2014, 2013, 2013,  2015, 2012, 2015, 2011, 2011, 2014, 2011, 2015, 2012, 2014, 2016, 2011, 2014))

#Adjust weights

cses4$weight <-  cses4$D1010_1 * cses4$D1010_2 

#For Income#

for(i in 1:17){
  horizontal.4(data = cses4, 
               variable.interest = "D2020",
               variable.interest2 = D2020,
               variable.party = "D3018_3",
               variable.party2 = D3018_3,
               variable.partisan = "D3018_1",
               variable.partisan2 = D3018_1,
               variable.country = "D1006_NAM",
               variable.country2 = D1006_NAM,
               variable.year = "D1008",
               variable.year2 = D1008,
               variable.weight = "weight",
               variable.weight2 = weight,
               country =Cses4$country.name[i], 
               year = Cses4$surveyyear[i],
               gini.var = "ggini_inc_cs",
               theil.var = "gtheil_inc_cs",
               cov.var = "gcov_inc_cs",
               sdmw.var = "sdmw_inc_cs")
}

rm(cses4)

CSES <- rbind(Cses1, Cses2, Cses3, Cses4)

####ESS 1 -7####

#Import Data#

ess17 <- import("ESS Data/ESS1-7e01.dta")

#Make country label for merging and identification#
ess17 <- ess17 %>%
  mutate(country.name = case_when(
    cntry == "AT" ~ "Austria",
    cntry == "BE" ~ "Belgium",
    cntry == "BG" ~ "Bulgaria",
    cntry == "CY" ~ "Cyprus",
    cntry == "CZ" ~ "Czech Republic",
    cntry == "DE" ~ "Germany",
    cntry == "DK" ~ "Denmark",
    cntry == "EE" ~ "Estonia",
    cntry == "ES" ~ "Spain",
    cntry == "FI" ~ "Finland",
    cntry == "FR" ~ "France",
    cntry == "GB" ~ "Great Britain",
    cntry == "GR" ~ "Greece",
    cntry == "HR" ~ "Croatia",
    cntry == "HU" ~ "Hungary",
    cntry == "IE" ~ "Ireland",
    cntry == "IT" ~ "Italy",
    cntry == "LT" ~ "Lithuania",
    cntry == "NL" ~ "Netherlands",
    cntry == "NO" ~ "Norway",
    cntry == "PL" ~ "Poland",
    cntry == "PT" ~ "Portugal",
    cntry == "SE" ~ "Sweden",
    cntry == "SI" ~ "Slovenia",
    cntry == "SK" ~ "Slovakia"
  ), #Make survey year identifier
  surveyyear = case_when(
    essround == 1 ~ 2002,
    essround == 2 ~ 2004,
    essround == 3 ~ 2006,
    essround == 4 ~ 2008,
    essround == 5 ~ 2010,
    essround == 6 ~ 2012,
    essround == 7 ~ 2014
  ), #Harmonize income in waves 4-7 with CSES
  inc47 = case_when(
    hinctnta == 1 | hinctnta == 2 ~ 1,
    hinctnta == 3 | hinctnta == 4 ~ 2,
    hinctnta == 5 | hinctnta == 6 ~ 3,
    hinctnta == 7 | hinctnta == 8 ~ 4,
    hinctnta == 9 | hinctnta == 10 ~ 5
  )
  )

ess17 <- ess17 %>%
  group_by(country.name, surveyyear) %>%
  mutate(incqui = ntile(hinctnt, 5))



#Which party close to#
ess17$clpart <- apply(ess17[, 182:310], 1, max, na.rm = T)
ess17$clpart[ess17$clpart == -Inf] <- NA

ess17$income <- apply(ess17[,c(960,961)], 1, max, na.rm = T)
ess17$income[ess17$income == -Inf] <- NA

ess17 <- as.data.frame(ess17)

#Make data frame to hold estimates by country-survey year

ess_waves <- data.frame(country.name = c(rep("Austria", 3), 
                                         rep("Belgium", 3),
                                         rep("Bulgaria", 3),
                                         "Croatia",
                                         rep("Cyprus", 2),
                                         rep("Czech Republic", 4),
                                         rep("Denmark", 6),
                                         rep("Estonia", 6),
                                         rep("Finland", 6),
                                         rep("France", 4),
                                         rep("Germany", 7),
                                         rep("Great Britain", 5),
                                         rep("Greece", 4),
                                         rep("Hungary", 4),
                                         rep("Ireland", 5),
                                         rep("Italy", 2),
                                         "Lithuania",
                                         rep("Netherlands", 4),
                                         "Norway",
                                         rep("Poland", 7),
                                         rep("Portugal", 7),
                                         rep("Slovakia", 3),
                                         rep("Slovenia", 3),
                                         rep("Spain", 5),
                                         rep("Sweden", 5)),
                        surveyyear = c(2002,2006,2014,
                                       2004,2010,2014,
                                       2006,2008, 2010,
                                       2010, 
                                       2010,2012,
                                       2002,2010,2012,2014,
                                       2002,2004,2006,2008,2010,2012,
                                       2004,2006,2008,2010,2012,2014,
                                       2002,2004,2006,2008,2010,2012,
                                       2002,2006,2008,2012,
                                       2002,2004,2006,2008,2010,2012,2014,
                                       2002,2004,2006,2010,2014,
                                       2002,2004,2008,2010,
                                       2002,2006,2010,2014,
                                       2002,2006,2008,2010,2012,
                                       2002,2012,
                                       2012,
                                       2002,2006,2010,2012,
                                       2012,
                                       2002,2004,2006,2008,2010,2012,2014,
                                       2002,2004,2006,2008,2010,2012,2014,
                                       2006,2010,2012,
                                       2004,2008,2014,
                                       2002,2004,2008,2010,2012,
                                       2002,2004,2006,2010,2014))


#detach(package:plyr, T)

horizontal2 <- function(data,
                        variable.interest,
                        variable.interest2,
                        variable.party,
                        variable.party2,
                        variable.partisan,
                        variable.partisan2,
                        variable.country,
                        variable.country2,
                        variable.year,
                        variable.year2,
                        variable.weight,
                        variable.weight2,
                        country, 
                        year,
                        gini.var,
                        theil.var,
                        cov.var,
                        sdmw.var){
  #Subset data to requested country, year, and partisans#
  print(country)
  require(dplyr)
  require(lazyeval)
  
  filter_criteria1 <- interp(~y == x, 
                             .values = list(y = as.name(variable.country), x = country))
  filter_criteria2 <- interp(~y == x, 
                             .values = list(y = as.name(variable.year), 
                                            x = year))
  filter_criteria3 <- interp( ~y == 1, 
                              .values = list(y = as.name(variable.partisan)))
  filter_criteria4 <- interp(~!is.na(y) == TRUE, 
                             .values = list(y = as.name(variable.party)))
  partisans <- data %>%
    filter_(filter_criteria2) %>%
    filter_(filter_criteria1) %>%
    filter_(filter_criteria3) %>%
    filter_(filter_criteria4)
  
  
  print(table(partisans[,variable.party])) #Check if any groups need to be combined#
  
  #Meaning of numbers in CSES Appendix#
  
  library(dplyr)
  
  #Averge Income of each party's partians#
  
  variable.party2 <- enquo(variable.party2)                    # Create quosure
  
  variable.interest2 <- enquo(variable.interest2)
  
  variable.weight2 <- enquo(variable.weight2)
  
  mean <- data.frame(V1 = partisans %>%
                       group_by(!!variable.party2) %>%  
                       summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% 
                       pull(m),
                     n.part = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(n = n()/nrow(partisans)) %>%
                       pull(n),
                     num = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(ns = n()) %>%
                       pull(ns))
  
  means <- mean %>%
    filter(num >= 10)
  print(means)
  
  #Attach party indicators#
  
  means$party <- as.numeric(rownames(means))
  
  print(means)
  
  
  
  
  #Weighted mean income of all respondents#
  
  avg.p <- partisans %>% 
    summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% pull(m)
  
  print(avg.p)
  
  #Ratio of party means to overall means#
  
  means$ratio <- means$V1/avg.p
  print(means$ratio)
  
  #Difference between party means and overall means#
  
  means$dif <- means$V1 - avg.p
  print(means$dif)
  #Calculate GGini#
  
  nums <- matrix(nrow = length(means$party), ncol = length(means$party))
  nums <- as.data.frame(nums)
  sums <- NA
  print(class(means))
  print(class(nums))
  
  
  #GGini#
  print(class(means$party))
  print(means)
  print(nums)
  for(i in means$party){
    for(j in means$party){
      nums[i,j] <- means$n.part[i]*means$n.part[j]*
        abs(means$V1[i] - means$V1[j])
    }}
  print(nums)
  sums <- rowSums(nums)
  print(sums)
  ggini <- (1/(2 * avg.p)) * sum(sums)
  
  #GTheil#
  
  theil.component <- NA
  gtheil <- NA
  for(i in means$party){
    theil.component[i] <- means$n.part[i] * means$ratio[i] * log(means$ratio[i])
  }
  
  gtheil <- sum(theil.component, na.rm = T)
  
  #GCOV# 
  
  inner <- NA
  gcov <- NA
  for(i in means$party){
    inner[i] <- means$n.part[i] * (means$dif[i]^2)
  }
  gcov <- (1/avg.p)*sqrt(sum(inner, na.rm = T))
  
  #Weighted#
  
  sdmw <- wt.sd(means$V1, means$n.part)
  
  #Attach to pols#
  
  ess_waves[ess_waves$country.name == country & ess_waves$surveyyear == year, gini.var] <<- ggini
  ess_waves[ess_waves$country.name == country & ess_waves$surveyyear == year, theil.var] <<- gtheil
  ess_waves[ess_waves$country.name == country & ess_waves$surveyyear == year, cov.var] <<- gcov
  ess_waves[ess_waves$country.name == country & ess_waves$surveyyear == year, sdmw.var] <<- sdmw
}

###Pre-Processing for partisanship###

#All non responses to NA#

ess17$clsprty[ess17$clsprty == 7 | ess17$clsprty == 8 | ess17$clsprty == 9] <- NA


#Income#

n <- c(1:101)
a <- which(ess_waves$surveyyear == 2002 & (ess_waves$country.name == "Hungary" | ess_waves$country.name == "Ireland" | ess_waves$country.name == "France" ))
a2 <- which(ess_waves$surveyyear == 2004 & ess_waves$country.name == "Estonia")
a3 <- which(ess_waves$surveyyear == 2006 & (ess_waves$country.name == "Estonia" | ess_waves$country.name == "Hungary"))
b <- which(ess_waves$surveyyear == 2008 & (ess_waves$country.name == "Bulgaria" | ess_waves$country.name == "Cyprus" | ess_waves$country.name == "Slovakia" | ess_waves$country.name == "Belgium"))
c <- which(ess_waves$surveyyear == 2010 & ess_waves$country.name == "Portugal")
d <- which(ess_waves$surveyyear == 2014 & ess_waves$country.name == "Estonia")
n <- n[-c(a, a2, a3, b,c,d)]

for(i in n){
  horizontal2(data = ess17,
              variable.interest = "income",
              variable.interest2 = income,
              variable.party = "clpart",
              variable.party2 =clpart,
              variable.partisan = "clsprty",
              variable.partisan2 = clsprty,
              variable.country = "country.name",
              variable.country2 = country.name,
              variable.year = "surveyyear",
              variable.year2 = surveyyear,
              variable.weight = "pspwght",
              variable.weight2 = pspwght,
              country = ess_waves[i,1], 
              year = ess_waves[i,2],
              gini.var = "ggini_inc_es",
              theil.var= "gtheil_inc_es",
              cov.var = "gcov_inc_es",
              sdmw.var = "sdmw_inc_es")
}

rm(ess17)

####ESS8####

ess8 <- import("ESS Data/ESS8e02.dta")

ess8 <- ess8 %>%
  mutate(country.name = case_when(
    cntry == "AT" ~ "Austria",
    cntry == "BE" ~ "Belgium",
    cntry == "BG" ~ "Bulgaria",
    cntry == "CZ" ~ "Czech Republic",
    cntry == "DE" ~ "Germany",
    cntry == "EE" ~ "Estonia",
    cntry == "ES" ~ "Spain",
    cntry == "FI" ~ "Finland",
    cntry == "FR" ~ "France",
    cntry == "GB" ~ "Great Britain",
    cntry == "HR" ~ "Croatia",
    cntry == "HU" ~ "Hungary",
    cntry == "IE" ~ "Ireland",
    cntry == "IT" ~ "Italy",
    cntry == "LT" ~ "Lithuania",
    cntry == "NL" ~ "Netherlands",
    cntry == "NO" ~ "Norway",
    cntry == "PL" ~ "Poland",
    cntry == "PT" ~ "Portugal",
    cntry == "SE" ~ "Sweden",
    cntry == "SI" ~ "Slovenia"
  ), #Make survey year identifier
  surveyyear = 2016, #Harmonize income 
  inc47 = case_when(
    hinctnta == 1 | hinctnta == 2 ~ 1,
    hinctnta == 3 | hinctnta == 4 ~ 2,
    hinctnta == 5 | hinctnta == 6 ~ 3,
    hinctnta == 7 | hinctnta == 8 ~ 4,
    hinctnta == 9 | hinctnta == 10 ~ 5
  ))

#Which party close to#
ess8$clpart <- apply(ess8[, 61:83], 1, max, na.rm = T)

#Storage dataframe
ess_wave8 <- data.frame(country.name = c("Austria", "Belgium", "Czech Republic", "Estonia", "Finland", "France", "Germany", "Great Britain", "Hungary", "Ireland", "Italy", "Lithuania", "Netherlands", "Norway", "Poland", "Portugal", "Slovenia", "Spain", "Sweden"),
                        surveyyear = rep(2016, 19))

ess8$clsprty[ess8$clsprty == 7 | ess8$clsprty == 8 | ess8$clsprty == 9] <- NA

#final horizontal#

horizontal3 <- function(data,
                        variable.interest,
                        variable.interest2,
                        variable.party,
                        variable.party2,
                        variable.partisan,
                        variable.partisan2,
                        variable.country,
                        variable.country2,
                        variable.year,
                        variable.year2,
                        variable.weight,
                        variable.weight2,
                        country, 
                        year,
                        gini.var,
                        theil.var,
                        cov.var,
                        sdmw.var){
  #Subset data to requested country, year, and partisans#
  print(country)
  require(dplyr)
  require(lazyeval)
  
  filter_criteria1 <- interp(~y == x, 
                             .values = list(y = as.name(variable.country), x = country))
  filter_criteria2 <- interp(~y == x, 
                             .values = list(y = as.name(variable.year), 
                                            x = year))
  filter_criteria3 <- interp( ~y == 1, 
                              .values = list(y = as.name(variable.partisan)))
  filter_criteria4 <- interp(~!is.na(y) == TRUE, 
                             .values = list(y = as.name(variable.party)))
  partisans <- data %>%
    filter_(filter_criteria2) %>%
    filter_(filter_criteria1) %>%
    filter_(filter_criteria3) %>%
    filter_(filter_criteria4)
  
  print(table(partisans[,variable.party])) #Check if any groups need to be combined#
  
  #Meaning of numbers in CSES Appendix#
  
  library(dplyr)
  
  #Averge Income of each party's partians#
  
  variable.party2 <- enquo(variable.party2)                    # Create quosure
  
  variable.interest2 <- enquo(variable.interest2)
  
  variable.weight2 <- enquo(variable.weight2)
  
  mean <- data.frame(V1 = partisans %>%
                       group_by(!!variable.party2) %>%  
                       summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% 
                       pull(m),
                     n.part = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(n = n()/nrow(partisans)) %>%
                       pull(n),
                     num = partisans %>%
                       group_by(!!variable.party2) %>%
                       summarize(ns = n()) %>%
                       pull(ns))
  
  means <- mean %>%
    filter(num >= 10)
  print(means)
  
  #Attach party indicators#
  
  means$party <- as.numeric(rownames(means))
  
  print(means)
  
  
  
  
  #Weighted mean income of all respondents#
  
  avg.p <- partisans %>% 
    summarize(m = weighted.mean(!!variable.interest2,  !!variable.weight2, na.rm = T)) %>% pull(m)
  
  print(avg.p)
  
  #Ratio of party means to overall means#
  
  means$ratio <- means$V1/avg.p
  print(means$ratio)
  
  #Difference between party means and overall means#
  
  means$dif <- means$V1 - avg.p
  print(means$dif)
  #Calculate GGini#
  
  nums <- matrix(nrow = length(means$party), ncol = length(means$party))
  nums <- as.data.frame(nums)
  sums <- NA
  print(class(means))
  print(class(nums))
  
  
  #GGini#
  print(class(means$party))
  print(means)
  print(nums)
  for(i in means$party){
    for(j in means$party){
      nums[i,j] <- means$n.part[i]*means$n.part[j]*
        abs(means$V1[i] - means$V1[j])
    }}
  print(nums)
  sums <- rowSums(nums)
  print(sums)
  ggini <- (1/(2 * avg.p)) * sum(sums)
  
  #GTheil#
  
  theil.component <- NA
  gtheil <- NA
  for(i in means$party){
    theil.component[i] <- means$n.part[i] * means$ratio[i] * log(means$ratio[i])
  }
  
  gtheil <- sum(theil.component, na.rm = T)
  
  #GCOV# 
  
  inner <- NA
  gcov <- NA
  for(i in means$party){
    inner[i] <- means$n.part[i] * (means$dif[i]^2)
  }
  gcov <- (1/avg.p)*sqrt(sum(inner, na.rm = T))
  
  #Weighted#
  
  sdmw <- wt.sd(means$V1, means$n.part)
  
  #Attach to pols#
  
  ess_wave8[ess_wave8$country.name == country & ess_wave8$surveyyear == year, gini.var] <<- ggini
  ess_wave8[ess_wave8$country.name == country & ess_wave8$surveyyear == year, theil.var] <<- gtheil
  ess_wave8[ess_wave8$country.name == country & ess_wave8$surveyyear == year, cov.var] <<- gcov
  ess_wave8[ess_wave8$country.name == country & ess_wave8$surveyyear == year, sdmw.var] <<- sdmw
}

#Income#
for(i in c(1:19)){
  horizontal3(data = ess8,
              variable.interest = "inc47",
              variable.interest2 = inc47,
              variable.party = "clpart",
              variable.party2 =clpart,
              variable.partisan = "clsprty",
              variable.partisan2 = clsprty,
              variable.country = "country.name",
              variable.country2 = country.name,
              variable.year = "surveyyear",
              variable.year2 = surveyyear,
              variable.weight = "pspwght",
              variable.weight2 = pspwght,
              country = ess_wave8[i,1], 
              year = ess_wave8[i,2],
              gini.var = "ggini_inc_es",
              theil.var= "gtheil_inc_es",
              cov.var = "gcov_inc_es",
              sdmw.var = "sdmw_inc_es")
}

#Combine all ESS waves#

ess <- bind_rows(ess_waves, ess_wave8) %>%
  arrange(country.name, surveyyear)


#Combine ess and CSES#

full_dat <- full_join(ess, CSES, by = c("country.name", "surveyyear")) %>%
  arrange(country.name, surveyyear) %>%
  dplyr::rename(electionyear = surveyyear)

data_merge <- data.frame(country.name = unique(full_dat$country.name),
                         electionyear = rep(1995:2019, times = length(unique(full_dat$country.name))))

data_merge <- data_merge %>%
  arrange(country.name, electionyear) %>%
  left_join(full_dat, by = c("country.name", "electionyear"))

library(schoolmath)
library(dplyr)

data_merge <- data_merge %>%
  group_by(country.name) %>% #group by country ECON
  mutate(lag_ggini_inc_es = lag(ggini_inc_es)) %>% #created lagged version
  mutate(lead_ggini_inc_es = lead(ggini_inc_es)) %>% #create lead version
  mutate(lag_gtheil_inc_es = lag(gtheil_inc_es)) %>% #created lagged version
  mutate(lead_gtheil_inc_es = lead(gtheil_inc_es)) %>% #create lead version
  mutate(lag_gcov_inc_es = lag(gcov_inc_es)) %>% #created lagged version
  mutate(lead_gcov_inc_es = lead(gcov_inc_es)) %>% #create lead version
  mutate(lag_sdmw_inc_es = lag(sdmw_inc_es)) %>% #created lagged version
  mutate(lead_sdmw_inc_es = lead(sdmw_inc_es)) %>% #create lead version
  ungroup(data_merge)

data_merge <- data_merge %>%#Income
  mutate(mean_ggini_inc_es = rowMeans(dplyr::select(data_merge, lag_ggini_inc_es, lead_ggini_inc_es), na.rm = T))%>%
  mutate(es_ggini_inc = ifelse(is.even(electionyear), 
                               ggini_inc_es, 
                               mean_ggini_inc_es))%>%
  mutate(mean_gtheil_inc_es = rowMeans(dplyr::select(data_merge, lag_gtheil_inc_es, lead_gtheil_inc_es), na.rm = T))%>%
  mutate(es_gtheil_inc = ifelse(is.even(electionyear), 
                                gtheil_inc_es, 
                                mean_gtheil_inc_es))%>%
  mutate(mean_gcov_inc_es = rowMeans(dplyr::select(data_merge, lag_gcov_inc_es, lead_gcov_inc_es), na.rm = T))%>%
  mutate(es_gcov_inc = ifelse(is.even(electionyear), 
                              gcov_inc_es, 
                              mean_gcov_inc_es))%>%
  mutate(mean_sdmw_inc_es = rowMeans(dplyr::select(data_merge, lag_sdmw_inc_es, lead_sdmw_inc_es), na.rm = T))%>%
  mutate(es_sdmw_inc = ifelse(is.even(electionyear), 
                              sdmw_inc_es, 
                              mean_sdmw_inc_es))

#####Combining ESS and CSES#####

###GCOV###

#Model Predicting CSES from ESS#

library(lme4)
mod.1 <- lmer(gcov_inc_cs ~ es_gcov_inc + (1 | country.name),
              data = data_merge)
summary(mod.1)

library(MASS)
set.seed(1004)
mods <- mvrnorm(n = 5, mu = fixef(mod.1), Sigma = vcov(mod.1))

library(plyr)
preds <- alply(mods, 1, function(x) {
  predict(mod.1,
          re.form = ~ 0, # sample all REs
          newparams = list(beta = x),# use sampled fixed effects
          newdata = data_merge,
          allow.new.levels = TRUE) 
}
)
preds.gcov <- data.frame(preds1.gcov = preds[[1]], preds2.gcov = preds[[2]],
                         preds3.gcov = preds[[3]], preds4.gcov = preds[[4]],
                         preds5.gcov = preds[[5]])


preds.gcov$country.name <- data_merge$country.name
preds.gcov$electionyear <- data_merge$electionyear

#Add predictions onto data_merge#

data_merge <- left_join(data_merge, preds.gcov, by = c("country.name", "electionyear"))

#Combine to make complete variables
data_merge <- as.data.frame(data_merge)

data_merge$gcov_inc1 <- ifelse(!is.na(data_merge$gcov_inc_cs), 
                               data_merge$gcov_inc_cs, 
                               data_merge$preds1.gcov)
summary(data_merge$gcov_inc1)

data_merge$gcov_inc2 <- ifelse(!is.na(data_merge$gcov_inc_cs), 
                               data_merge$gcov_inc_cs, 
                               data_merge$preds2.gcov)
summary(data_merge$gcov_inc2)

data_merge$gcov_inc3 <- ifelse(!is.na(data_merge$gcov_inc_cs), 
                               data_merge$gcov_inc_cs, 
                               data_merge$preds3.gcov)
summary(data_merge$gcov_inc3)

data_merge$gcov_inc4 <- ifelse(!is.na(data_merge$gcov_inc_cs), 
                               data_merge$gcov_inc_cs, 
                               data_merge$preds4.gcov)
summary(data_merge$gcov_inc4)

data_merge$gcov_inc5 <- ifelse(!is.na(data_merge$gcov_inc_cs), 
                               data_merge$gcov_inc_cs, 
                               data_merge$preds5.gcov)
summary(data_merge$gcov_inc5)

###GGINI###

mod.2 <- lmer(ggini_inc_cs ~ es_ggini_inc + (1 | country.name),
              data = data_merge)
summary(mod.2)

mods <- mvrnorm(n = 5, mu = fixef(mod.2), Sigma = vcov(mod.2))

preds.ggini <- alply(mods, 1, function(x) {
  predict(mod.2,
          re.form = ~ 0, # sample all REs
          newparams = list(beta = x),# use sampled fixed effects
          newdata = data_merge,
          allow.new.levels = TRUE) 
}
)
preds.ggini <- data.frame(preds1.ggini = preds.ggini[[1]], preds2.ggini = preds.ggini[[2]],
                          preds3.ggini = preds.ggini[[3]], preds4.ggini = preds.ggini[[4]],
                          preds5.ggini = preds.ggini[[5]])


preds.ggini$country.name <- data_merge$country.name
preds.ggini$electionyear <- data_merge$electionyear

#Add predictions onto data_merge#

data_merge <- left_join(data_merge, preds.ggini, by = c("country.name", "electionyear"))

#Combine to make complete variables
data_merge <- as.data.frame(data_merge)

data_merge$ggini_inc1 <- ifelse(!is.na(data_merge$ggini_inc_cs), 
                                data_merge$ggini_inc_cs, 
                                data_merge$preds1.ggini)
summary(data_merge$ggini_inc1)

data_merge$ggini_inc2 <- ifelse(!is.na(data_merge$ggini_inc_cs), 
                                data_merge$ggini_inc_cs, 
                                data_merge$preds2.ggini)
summary(data_merge$ggini_inc2)

data_merge$ggini_inc3 <- ifelse(!is.na(data_merge$ggini_inc_cs), 
                                data_merge$ggini_inc_cs, 
                                data_merge$preds3.ggini)
summary(data_merge$ggini_inc3)

data_merge$ggini_inc4 <- ifelse(!is.na(data_merge$ggini_inc_cs), 
                                data_merge$ggini_inc_cs, 
                                data_merge$preds4.ggini)
summary(data_merge$ggini_inc4)

data_merge$ggini_inc5 <- ifelse(!is.na(data_merge$ggini_inc_cs), 
                                data_merge$ggini_inc_cs, 
                                data_merge$preds5.ggini)
summary(data_merge$ggini_inc5)
###GTHEIL###

mod.3 <- lmer(gtheil_inc_cs ~ es_gtheil_inc + (1 | country.name),
              data = data_merge)
summary(mod.3)

mods <- mvrnorm(n = 5, mu = fixef(mod.3), Sigma = vcov(mod.3))

preds.gtheil <- alply(mods, 1, function(x) {
  predict(mod.3,
          re.form = ~ 0, # sample all REs
          newparams = list(beta = x),# use sampled fixed effects
          newdata = data_merge,
          allow.new.levels = TRUE) 
}
)
preds.gtheil <- data.frame(preds1.gtheil = preds.gtheil[[1]], preds2.gtheil = preds.gtheil[[2]],
                           preds3.gtheil = preds.gtheil[[3]], preds4.gtheil = preds.gtheil[[4]],
                           preds5.gtheil = preds.gtheil[[5]])


preds.gtheil$country.name <- data_merge$country.name
preds.gtheil$electionyear <- data_merge$electionyear

#Add predictions onto data_merge#

data_merge <- left_join(data_merge, preds.gtheil, by = c("country.name", "electionyear"))

#Combine to make complete variables
data_merge <- as.data.frame(data_merge)

data_merge$gtheil_inc1 <- ifelse(!is.na(data_merge$gtheil_inc_cs), 
                                 data_merge$gtheil_inc_cs, 
                                 data_merge$preds1.gtheil)
summary(data_merge$gtheil_inc1)

data_merge$gtheil_inc2 <- ifelse(!is.na(data_merge$gtheil_inc_cs), 
                                 data_merge$gtheil_inc_cs, 
                                 data_merge$preds2.gtheil)
summary(data_merge$gtheil_inc2)

data_merge$gtheil_inc3 <- ifelse(!is.na(data_merge$gtheil_inc_cs), 
                                 data_merge$gtheil_inc_cs, 
                                 data_merge$preds3.gtheil)
summary(data_merge$gtheil_inc3)

data_merge$gtheil_inc4 <- ifelse(!is.na(data_merge$gtheil_inc_cs), 
                                 data_merge$gtheil_inc_cs, 
                                 data_merge$preds4.gtheil)
summary(data_merge$gtheil_inc4)

data_merge$gtheil_inc5 <- ifelse(!is.na(data_merge$gtheil_inc_cs), 
                                 data_merge$gtheil_inc_cs, 
                                 data_merge$preds5.gtheil)
summary(data_merge$gtheil_inc5)

###Weighted Standard Deviation###

mod.4 <- lmer(sdmw_inc_cs ~ es_sdmw_inc + (1 | country.name),
              data = data_merge)
summary(mod.4)

mods <- mvrnorm(n = 5, mu = fixef(mod.4), Sigma = vcov(mod.4))

preds.sdmw <- alply(mods, 1, function(x) {
  predict(mod.4,
          re.form = ~ 0, # sample all REs
          newparams = list(beta = x),# use sampled fixed effects
          newdata = data_merge,
          allow.new.levels = TRUE) 
}
)
preds.sdmw <- data.frame(preds1.sdmw = preds.sdmw[[1]], preds2.sdmw = preds.sdmw[[2]],
                         preds3.sdmw = preds.sdmw[[3]], preds4.sdmw = preds.sdmw[[4]],
                         preds5.sdmw = preds.sdmw[[5]])


preds.sdmw$country.name <- data_merge$country.name
preds.sdmw$electionyear <- data_merge$electionyear

#Add predictions onto data_merge#

data_merge <- left_join(data_merge, preds.sdmw, by = c("country.name", "electionyear"))

#Combine to make complete variables
data_merge <- as.data.frame(data_merge)

data_merge$sdmw_inc1 <- ifelse(!is.na(data_merge$sdmw_inc_cs), 
                               data_merge$sdmw_inc_cs, 
                               data_merge$preds1.sdmw)
summary(data_merge$sdmw_inc1)

data_merge$sdmw_inc2 <- ifelse(!is.na(data_merge$sdmw_inc_cs), 
                               data_merge$sdmw_inc_cs, 
                               data_merge$preds2.sdmw)
summary(data_merge$sdmw_inc2)

data_merge$sdmw_inc3 <- ifelse(!is.na(data_merge$sdmw_inc_cs), 
                               data_merge$sdmw_inc_cs, 
                               data_merge$preds3.sdmw)
summary(data_merge$sdmw_inc3)

data_merge$sdmw_inc4 <- ifelse(!is.na(data_merge$sdmw_inc_cs), 
                               data_merge$sdmw_inc_cs, 
                               data_merge$preds4.sdmw)
summary(data_merge$sdmw_inc4)

data_merge$sdmw_inc5 <- ifelse(!is.na(data_merge$sdmw_inc_cs), 
                               data_merge$sdmw_inc_cs, 
                               data_merge$preds5.sdmw)
summary(data_merge$sdmw_inc5)


data_merge <- dplyr::select(data_merge, country.name, electionyear,
                            ggini_inc_cs, gtheil_inc_cs, gcov_inc_cs, sdmw_inc_cs,
                            gcov_inc1, gcov_inc2, gcov_inc3, gcov_inc4, gcov_inc5,
                            ggini_inc1, ggini_inc2, ggini_inc3, ggini_inc4, ggini_inc5,
                            gtheil_inc1, gtheil_inc2, gtheil_inc3, gtheil_inc4, gtheil_inc5,
                            sdmw_inc1, sdmw_inc2, sdmw_inc3, sdmw_inc4, sdmw_inc5)

######Attach ESS and CSES data to pols#####

pols <- left_join(pols, data_merge, by = c("country.name", "electionyear")) %>%
  arrange(country.name, electionyear)

#Drop duplicated Greek Elections

pols <- pols[-c(56, 58),]

